import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("Employee-Attrition.csv")
df.head()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8 | 0 | 1 | 6 | 4 | 0 | 5 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10 | 3 | 3 | 10 | 7 | 1 | 7 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7 | 3 | 3 | 0 | 0 | 0 | 0 |
| 3 | 33 | No | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | ... | 3 | 80 | 0 | 8 | 3 | 3 | 8 | 7 | 3 | 0 |
| 4 | 27 | No | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | ... | 4 | 80 | 1 | 6 | 3 | 3 | 2 | 2 | 2 | 2 |
5 rows × 35 columns
df.shape
(1470, 35)
df.describe()
| Age | DailyRate | DistanceFromHome | Education | EmployeeCount | EmployeeNumber | EnvironmentSatisfaction | HourlyRate | JobInvolvement | JobLevel | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | ... | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 |
| mean | 36.923810 | 802.485714 | 9.192517 | 2.912925 | 1.0 | 1024.865306 | 2.721769 | 65.891156 | 2.729932 | 2.063946 | ... | 2.712245 | 80.0 | 0.793878 | 11.279592 | 2.799320 | 2.761224 | 7.008163 | 4.229252 | 2.187755 | 4.123129 |
| std | 9.135373 | 403.509100 | 8.106864 | 1.024165 | 0.0 | 602.024335 | 1.093082 | 20.329428 | 0.711561 | 1.106940 | ... | 1.081209 | 0.0 | 0.852077 | 7.780782 | 1.289271 | 0.706476 | 6.126525 | 3.623137 | 3.222430 | 3.568136 |
| min | 18.000000 | 102.000000 | 1.000000 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 30.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 80.0 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 30.000000 | 465.000000 | 2.000000 | 2.000000 | 1.0 | 491.250000 | 2.000000 | 48.000000 | 2.000000 | 1.000000 | ... | 2.000000 | 80.0 | 0.000000 | 6.000000 | 2.000000 | 2.000000 | 3.000000 | 2.000000 | 0.000000 | 2.000000 |
| 50% | 36.000000 | 802.000000 | 7.000000 | 3.000000 | 1.0 | 1020.500000 | 3.000000 | 66.000000 | 3.000000 | 2.000000 | ... | 3.000000 | 80.0 | 1.000000 | 10.000000 | 3.000000 | 3.000000 | 5.000000 | 3.000000 | 1.000000 | 3.000000 |
| 75% | 43.000000 | 1157.000000 | 14.000000 | 4.000000 | 1.0 | 1555.750000 | 4.000000 | 83.750000 | 3.000000 | 3.000000 | ... | 4.000000 | 80.0 | 1.000000 | 15.000000 | 3.000000 | 3.000000 | 9.000000 | 7.000000 | 3.000000 | 7.000000 |
| max | 60.000000 | 1499.000000 | 29.000000 | 5.000000 | 1.0 | 2068.000000 | 4.000000 | 100.000000 | 4.000000 | 5.000000 | ... | 4.000000 | 80.0 | 3.000000 | 40.000000 | 6.000000 | 4.000000 | 40.000000 | 18.000000 | 15.000000 | 17.000000 |
8 rows × 26 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1470 entries, 0 to 1469 Data columns (total 35 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 1470 non-null int64 1 Attrition 1470 non-null object 2 BusinessTravel 1470 non-null object 3 DailyRate 1470 non-null int64 4 Department 1470 non-null object 5 DistanceFromHome 1470 non-null int64 6 Education 1470 non-null int64 7 EducationField 1470 non-null object 8 EmployeeCount 1470 non-null int64 9 EmployeeNumber 1470 non-null int64 10 EnvironmentSatisfaction 1470 non-null int64 11 Gender 1470 non-null object 12 HourlyRate 1470 non-null int64 13 JobInvolvement 1470 non-null int64 14 JobLevel 1470 non-null int64 15 JobRole 1470 non-null object 16 JobSatisfaction 1470 non-null int64 17 MaritalStatus 1470 non-null object 18 MonthlyIncome 1470 non-null int64 19 MonthlyRate 1470 non-null int64 20 NumCompaniesWorked 1470 non-null int64 21 Over18 1470 non-null object 22 OverTime 1470 non-null object 23 PercentSalaryHike 1470 non-null int64 24 PerformanceRating 1470 non-null int64 25 RelationshipSatisfaction 1470 non-null int64 26 StandardHours 1470 non-null int64 27 StockOptionLevel 1470 non-null int64 28 TotalWorkingYears 1470 non-null int64 29 TrainingTimesLastYear 1470 non-null int64 30 WorkLifeBalance 1470 non-null int64 31 YearsAtCompany 1470 non-null int64 32 YearsInCurrentRole 1470 non-null int64 33 YearsSinceLastPromotion 1470 non-null int64 34 YearsWithCurrManager 1470 non-null int64 dtypes: int64(26), object(9) memory usage: 402.1+ KB
df.corr()
| Age | DailyRate | DistanceFromHome | Education | EmployeeCount | EmployeeNumber | EnvironmentSatisfaction | HourlyRate | JobInvolvement | JobLevel | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | 0.010661 | -0.001686 | 0.208034 | NaN | -0.010145 | 0.010146 | 0.024287 | 0.029820 | 0.509604 | ... | 0.053535 | NaN | 0.037510 | 0.680381 | -0.019621 | -0.021490 | 0.311309 | 0.212901 | 0.216513 | 0.202089 |
| DailyRate | 0.010661 | 1.000000 | -0.004985 | -0.016806 | NaN | -0.050990 | 0.018355 | 0.023381 | 0.046135 | 0.002966 | ... | 0.007846 | NaN | 0.042143 | 0.014515 | 0.002453 | -0.037848 | -0.034055 | 0.009932 | -0.033229 | -0.026363 |
| DistanceFromHome | -0.001686 | -0.004985 | 1.000000 | 0.021042 | NaN | 0.032916 | -0.016075 | 0.031131 | 0.008783 | 0.005303 | ... | 0.006557 | NaN | 0.044872 | 0.004628 | -0.036942 | -0.026556 | 0.009508 | 0.018845 | 0.010029 | 0.014406 |
| Education | 0.208034 | -0.016806 | 0.021042 | 1.000000 | NaN | 0.042070 | -0.027128 | 0.016775 | 0.042438 | 0.101589 | ... | -0.009118 | NaN | 0.018422 | 0.148280 | -0.025100 | 0.009819 | 0.069114 | 0.060236 | 0.054254 | 0.069065 |
| EmployeeCount | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| EmployeeNumber | -0.010145 | -0.050990 | 0.032916 | 0.042070 | NaN | 1.000000 | 0.017621 | 0.035179 | -0.006888 | -0.018519 | ... | -0.069861 | NaN | 0.062227 | -0.014365 | 0.023603 | 0.010309 | -0.011240 | -0.008416 | -0.009019 | -0.009197 |
| EnvironmentSatisfaction | 0.010146 | 0.018355 | -0.016075 | -0.027128 | NaN | 0.017621 | 1.000000 | -0.049857 | -0.008278 | 0.001212 | ... | 0.007665 | NaN | 0.003432 | -0.002693 | -0.019359 | 0.027627 | 0.001458 | 0.018007 | 0.016194 | -0.004999 |
| HourlyRate | 0.024287 | 0.023381 | 0.031131 | 0.016775 | NaN | 0.035179 | -0.049857 | 1.000000 | 0.042861 | -0.027853 | ... | 0.001330 | NaN | 0.050263 | -0.002334 | -0.008548 | -0.004607 | -0.019582 | -0.024106 | -0.026716 | -0.020123 |
| JobInvolvement | 0.029820 | 0.046135 | 0.008783 | 0.042438 | NaN | -0.006888 | -0.008278 | 0.042861 | 1.000000 | -0.012630 | ... | 0.034297 | NaN | 0.021523 | -0.005533 | -0.015338 | -0.014617 | -0.021355 | 0.008717 | -0.024184 | 0.025976 |
| JobLevel | 0.509604 | 0.002966 | 0.005303 | 0.101589 | NaN | -0.018519 | 0.001212 | -0.027853 | -0.012630 | 1.000000 | ... | 0.021642 | NaN | 0.013984 | 0.782208 | -0.018191 | 0.037818 | 0.534739 | 0.389447 | 0.353885 | 0.375281 |
| JobSatisfaction | -0.004892 | 0.030571 | -0.003669 | -0.011296 | NaN | -0.046247 | -0.006784 | -0.071335 | -0.021476 | -0.001944 | ... | -0.012454 | NaN | 0.010690 | -0.020185 | -0.005779 | -0.019459 | -0.003803 | -0.002305 | -0.018214 | -0.027656 |
| MonthlyIncome | 0.497855 | 0.007707 | -0.017014 | 0.094961 | NaN | -0.014829 | -0.006259 | -0.015794 | -0.015271 | 0.950300 | ... | 0.025873 | NaN | 0.005408 | 0.772893 | -0.021736 | 0.030683 | 0.514285 | 0.363818 | 0.344978 | 0.344079 |
| MonthlyRate | 0.028051 | -0.032182 | 0.027473 | -0.026084 | NaN | 0.012648 | 0.037600 | -0.015297 | -0.016322 | 0.039563 | ... | -0.004085 | NaN | -0.034323 | 0.026442 | 0.001467 | 0.007963 | -0.023655 | -0.012815 | 0.001567 | -0.036746 |
| NumCompaniesWorked | 0.299635 | 0.038153 | -0.029251 | 0.126317 | NaN | -0.001251 | 0.012594 | 0.022157 | 0.015012 | 0.142501 | ... | 0.052733 | NaN | 0.030075 | 0.237639 | -0.066054 | -0.008366 | -0.118421 | -0.090754 | -0.036814 | -0.110319 |
| PercentSalaryHike | 0.003634 | 0.022704 | 0.040235 | -0.011111 | NaN | -0.012944 | -0.031701 | -0.009062 | -0.017205 | -0.034730 | ... | -0.040490 | NaN | 0.007528 | -0.020608 | -0.005221 | -0.003280 | -0.035991 | -0.001520 | -0.022154 | -0.011985 |
| PerformanceRating | 0.001904 | 0.000473 | 0.027110 | -0.024539 | NaN | -0.020359 | -0.029548 | -0.002172 | -0.029071 | -0.021222 | ... | -0.031351 | NaN | 0.003506 | 0.006744 | -0.015579 | 0.002572 | 0.003435 | 0.034986 | 0.017896 | 0.022827 |
| RelationshipSatisfaction | 0.053535 | 0.007846 | 0.006557 | -0.009118 | NaN | -0.069861 | 0.007665 | 0.001330 | 0.034297 | 0.021642 | ... | 1.000000 | NaN | -0.045952 | 0.024054 | 0.002497 | 0.019604 | 0.019367 | -0.015123 | 0.033493 | -0.000867 |
| StandardHours | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| StockOptionLevel | 0.037510 | 0.042143 | 0.044872 | 0.018422 | NaN | 0.062227 | 0.003432 | 0.050263 | 0.021523 | 0.013984 | ... | -0.045952 | NaN | 1.000000 | 0.010136 | 0.011274 | 0.004129 | 0.015058 | 0.050818 | 0.014352 | 0.024698 |
| TotalWorkingYears | 0.680381 | 0.014515 | 0.004628 | 0.148280 | NaN | -0.014365 | -0.002693 | -0.002334 | -0.005533 | 0.782208 | ... | 0.024054 | NaN | 0.010136 | 1.000000 | -0.035662 | 0.001008 | 0.628133 | 0.460365 | 0.404858 | 0.459188 |
| TrainingTimesLastYear | -0.019621 | 0.002453 | -0.036942 | -0.025100 | NaN | 0.023603 | -0.019359 | -0.008548 | -0.015338 | -0.018191 | ... | 0.002497 | NaN | 0.011274 | -0.035662 | 1.000000 | 0.028072 | 0.003569 | -0.005738 | -0.002067 | -0.004096 |
| WorkLifeBalance | -0.021490 | -0.037848 | -0.026556 | 0.009819 | NaN | 0.010309 | 0.027627 | -0.004607 | -0.014617 | 0.037818 | ... | 0.019604 | NaN | 0.004129 | 0.001008 | 0.028072 | 1.000000 | 0.012089 | 0.049856 | 0.008941 | 0.002759 |
| YearsAtCompany | 0.311309 | -0.034055 | 0.009508 | 0.069114 | NaN | -0.011240 | 0.001458 | -0.019582 | -0.021355 | 0.534739 | ... | 0.019367 | NaN | 0.015058 | 0.628133 | 0.003569 | 0.012089 | 1.000000 | 0.758754 | 0.618409 | 0.769212 |
| YearsInCurrentRole | 0.212901 | 0.009932 | 0.018845 | 0.060236 | NaN | -0.008416 | 0.018007 | -0.024106 | 0.008717 | 0.389447 | ... | -0.015123 | NaN | 0.050818 | 0.460365 | -0.005738 | 0.049856 | 0.758754 | 1.000000 | 0.548056 | 0.714365 |
| YearsSinceLastPromotion | 0.216513 | -0.033229 | 0.010029 | 0.054254 | NaN | -0.009019 | 0.016194 | -0.026716 | -0.024184 | 0.353885 | ... | 0.033493 | NaN | 0.014352 | 0.404858 | -0.002067 | 0.008941 | 0.618409 | 0.548056 | 1.000000 | 0.510224 |
| YearsWithCurrManager | 0.202089 | -0.026363 | 0.014406 | 0.069065 | NaN | -0.009197 | -0.004999 | -0.020123 | 0.025976 | 0.375281 | ... | -0.000867 | NaN | 0.024698 | 0.459188 | -0.004096 | 0.002759 | 0.769212 | 0.714365 | 0.510224 | 1.000000 |
26 rows × 26 columns
df.isnull().any()
Age False Attrition False BusinessTravel False DailyRate False Department False DistanceFromHome False Education False EducationField False EmployeeCount False EmployeeNumber False EnvironmentSatisfaction False Gender False HourlyRate False JobInvolvement False JobLevel False JobRole False JobSatisfaction False MaritalStatus False MonthlyIncome False MonthlyRate False NumCompaniesWorked False Over18 False OverTime False PercentSalaryHike False PerformanceRating False RelationshipSatisfaction False StandardHours False StockOptionLevel False TotalWorkingYears False TrainingTimesLastYear False WorkLifeBalance False YearsAtCompany False YearsInCurrentRole False YearsSinceLastPromotion False YearsWithCurrManager False dtype: bool
df.isnull().sum()
Age 0 Attrition 0 BusinessTravel 0 DailyRate 0 Department 0 DistanceFromHome 0 Education 0 EducationField 0 EmployeeCount 0 EmployeeNumber 0 EnvironmentSatisfaction 0 Gender 0 HourlyRate 0 JobInvolvement 0 JobLevel 0 JobRole 0 JobSatisfaction 0 MaritalStatus 0 MonthlyIncome 0 MonthlyRate 0 NumCompaniesWorked 0 Over18 0 OverTime 0 PercentSalaryHike 0 PerformanceRating 0 RelationshipSatisfaction 0 StandardHours 0 StockOptionLevel 0 TotalWorkingYears 0 TrainingTimesLastYear 0 WorkLifeBalance 0 YearsAtCompany 0 YearsInCurrentRole 0 YearsSinceLastPromotion 0 YearsWithCurrManager 0 dtype: int64
plt.scatter(df["MonthlyIncome"],df["TotalWorkingYears"])
<matplotlib.collections.PathCollection at 0x151581093d0>
fig=plt.figure(figsize=(18,18))
sns.heatmap(df.corr(),annot=True)
<AxesSubplot:>
sns.pairplot(df)
<seaborn.axisgrid.PairGrid at 0x2039255da30>
sns.boxplot(df["Age"])
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='Age'>
sns.boxplot(df["DailyRate"])
C:\ProgramData\Anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
<AxesSubplot:xlabel='DailyRate'>
df.describe()
| Age | DailyRate | DistanceFromHome | Education | EmployeeCount | EmployeeNumber | EnvironmentSatisfaction | HourlyRate | JobInvolvement | JobLevel | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | ... | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 |
| mean | 36.923810 | 802.485714 | 9.192517 | 2.912925 | 1.0 | 1024.865306 | 2.721769 | 65.891156 | 2.729932 | 2.063946 | ... | 2.712245 | 80.0 | 0.793878 | 11.279592 | 2.799320 | 2.761224 | 7.008163 | 4.229252 | 2.187755 | 4.123129 |
| std | 9.135373 | 403.509100 | 8.106864 | 1.024165 | 0.0 | 602.024335 | 1.093082 | 20.329428 | 0.711561 | 1.106940 | ... | 1.081209 | 0.0 | 0.852077 | 7.780782 | 1.289271 | 0.706476 | 6.126525 | 3.623137 | 3.222430 | 3.568136 |
| min | 18.000000 | 102.000000 | 1.000000 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 30.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 80.0 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 30.000000 | 465.000000 | 2.000000 | 2.000000 | 1.0 | 491.250000 | 2.000000 | 48.000000 | 2.000000 | 1.000000 | ... | 2.000000 | 80.0 | 0.000000 | 6.000000 | 2.000000 | 2.000000 | 3.000000 | 2.000000 | 0.000000 | 2.000000 |
| 50% | 36.000000 | 802.000000 | 7.000000 | 3.000000 | 1.0 | 1020.500000 | 3.000000 | 66.000000 | 3.000000 | 2.000000 | ... | 3.000000 | 80.0 | 1.000000 | 10.000000 | 3.000000 | 3.000000 | 5.000000 | 3.000000 | 1.000000 | 3.000000 |
| 75% | 43.000000 | 1157.000000 | 14.000000 | 4.000000 | 1.0 | 1555.750000 | 4.000000 | 83.750000 | 3.000000 | 3.000000 | ... | 4.000000 | 80.0 | 1.000000 | 15.000000 | 3.000000 | 3.000000 | 9.000000 | 7.000000 | 3.000000 | 7.000000 |
| max | 60.000000 | 1499.000000 | 29.000000 | 5.000000 | 1.0 | 2068.000000 | 4.000000 | 100.000000 | 4.000000 | 5.000000 | ... | 4.000000 | 80.0 | 3.000000 | 40.000000 | 6.000000 | 4.000000 | 40.000000 | 18.000000 | 15.000000 | 17.000000 |
8 rows × 26 columns
df.head()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8 | 0 | 1 | 6 | 4 | 0 | 5 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10 | 3 | 3 | 10 | 7 | 1 | 7 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7 | 3 | 3 | 0 | 0 | 0 | 0 |
| 3 | 33 | No | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | ... | 3 | 80 | 0 | 8 | 3 | 3 | 8 | 7 | 3 | 0 |
| 4 | 27 | No | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | ... | 4 | 80 | 1 | 6 | 3 | 3 | 2 | 2 | 2 | 2 |
5 rows × 35 columns
fig, axes = plt.subplots(2,2)
sns.boxplot(data=df["YearsInCurrentRole"],ax=axes[0,0])
sns.boxplot(data=df["YearsSinceLastPromotion"],ax=axes[0,1])
sns.boxplot(data=df["YearsWithCurrManager"],ax=axes[1,0])
sns.boxplot(data=df["WorkLifeBalance"],ax=axes[1,1])
<AxesSubplot:>
fig, axes = plt.subplots(2,2)
sns.boxplot(data=df["DistanceFromHome"],ax=axes[0,0])
sns.boxplot(data=df["TotalWorkingYears"],ax=axes[0,1])
sns.boxplot(data=df["HourlyRate"],ax=axes[1,0])
sns.boxplot(data=df["YearsAtCompany"],ax=axes[1,1])
<AxesSubplot:>
YearsInCurrentRole_q1 = df.YearsInCurrentRole.quantile(0.25)
YearsInCurrentRole_q3 = df.YearsInCurrentRole.quantile(0.75)
IQR_YearsInCurrentRole=YearsInCurrentRole_q3-YearsInCurrentRole_q1
upperlimit_YearsInCurrentRole=YearsInCurrentRole_q3+1.5*IQR_YearsInCurrentRole
lower_limit_YearsInCurrentRole =YearsInCurrentRole_q1-1.5*IQR_YearsInCurrentRole
median_YearsInCurrentRole=df["YearsInCurrentRole"].median()
df['YearsInCurrentRole'] = np.where(
(df['YearsInCurrentRole'] > upperlimit_YearsInCurrentRole),
median_YearsInCurrentRole,
df['YearsInCurrentRole']
)
YearsSinceLastPromotion_q1 = df.YearsSinceLastPromotion.quantile(0.25)
YearsSinceLastPromotion_q3 = df.YearsSinceLastPromotion.quantile(0.75)
IQR_YearsSinceLastPromotion=YearsSinceLastPromotion_q3-YearsSinceLastPromotion_q1
upperlimit_YearsSinceLastPromotion=YearsSinceLastPromotion_q3+1.5*IQR_YearsSinceLastPromotion
lower_limit_YearsSinceLastPromotion =YearsSinceLastPromotion_q1-1.5*IQR_YearsSinceLastPromotion
median_YearsSinceLastPromotion=df["YearsSinceLastPromotion"].median()
df['YearsSinceLastPromotion'] = np.where(
(df['YearsSinceLastPromotion'] > upperlimit_YearsSinceLastPromotion),
median_YearsSinceLastPromotion,
df['YearsSinceLastPromotion']
)
YearsWithCurrManager_q1 = df.YearsWithCurrManager.quantile(0.25)
YearsWithCurrManager_q3 = df.YearsWithCurrManager.quantile(0.75)
IQR_YearsWithCurrManager=YearsWithCurrManager_q3-YearsWithCurrManager_q1
upperlimit_YearsWithCurrManager=YearsWithCurrManager_q3+1.5*IQR_YearsWithCurrManager
lower_limit_YearsWithCurrManager =YearsWithCurrManager_q1-1.5*IQR_YearsWithCurrManager
median_YearsWithCurrManager=df["YearsWithCurrManager"].median()
df['YearsWithCurrManager'] = np.where(
(df['YearsWithCurrManager'] > upperlimit_YearsWithCurrManager),
median_YearsWithCurrManager,
df['YearsWithCurrManager']
)
TotalWorkingYears_q1 = df.TotalWorkingYears.quantile(0.25)
TotalWorkingYears_q3 = df.TotalWorkingYears.quantile(0.75)
IQR_TotalWorkingYears=TotalWorkingYears_q3-TotalWorkingYears_q1
upperlimit_TotalWorkingYears=TotalWorkingYears_q3+1.5*IQR_TotalWorkingYears
lower_limit_TotalWorkingYears=TotalWorkingYears_q1-1.5*IQR_TotalWorkingYears
median_TotalWorkingYears=df["TotalWorkingYears"].median()
df['TotalWorkingYears'] = np.where(
(df['TotalWorkingYears'] > upperlimit_TotalWorkingYears),
median_TotalWorkingYears,
df['TotalWorkingYears']
)
YearsAtCompany_q1 = df.YearsAtCompany.quantile(0.25)
YearsAtCompany_q3 = df.YearsAtCompany.quantile(0.75)
IQR_YearsAtCompany=YearsAtCompany_q3-YearsAtCompany_q1
upperlimit_YearsAtCompany=YearsAtCompany_q3+1.5*IQR_YearsAtCompany
lower_limit_YearsAtCompany=YearsAtCompany_q1-1.5*IQR_YearsAtCompany
median_YearsAtCompany=df["YearsAtCompany"].median()
df['YearsAtCompany'] = np.where(
(df['YearsAtCompany'] > upperlimit_YearsAtCompany),
median_YearsAtCompany,
df['YearsAtCompany']
)
fig, axes = plt.subplots(2,2)
sns.boxplot(data=df["YearsWithCurrManager"],ax=axes[0,0])
sns.boxplot(data=df["TotalWorkingYears"],ax=axes[0,1])
sns.boxplot(data=df["YearsSinceLastPromotion"],ax=axes[1,0])
sns.boxplot(data=df["YearsAtCompany"],ax=axes[1,1])
<AxesSubplot:>
df.head()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8.0 | 0 | 1 | 6.0 | 4.0 | 0.0 | 5.0 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10.0 | 3 | 3 | 10.0 | 7.0 | 1.0 | 7.0 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7.0 | 3 | 3 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 33 | No | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | ... | 3 | 80 | 0 | 8.0 | 3 | 3 | 8.0 | 7.0 | 3.0 | 0.0 |
| 4 | 27 | No | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | ... | 4 | 80 | 1 | 6.0 | 3 | 3 | 2.0 | 2.0 | 2.0 | 2.0 |
5 rows × 35 columns
df.head(3)
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8.0 | 0 | 1 | 6.0 | 4.0 | 0.0 | 5.0 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10.0 | 3 | 3 | 10.0 | 7.0 | 1.0 | 7.0 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7.0 | 3 | 3 | 0.0 | 0.0 | 0.0 | 0.0 |
3 rows × 35 columns
df["BusinessTravel"].unique()
array(['Travel_Rarely', 'Travel_Frequently', 'Non-Travel'], dtype=object)
y=df["Attrition"]
y.head()
0 Yes 1 No 2 Yes 3 No 4 No Name: Attrition, dtype: object
df.drop("Attrition",axis=1,inplace=True)
df.head()
| Age | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | EnvironmentSatisfaction | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | 2 | ... | 1 | 80 | 0 | 8.0 | 0 | 1 | 6.0 | 4.0 | 0.0 | 5.0 |
| 1 | 49 | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | 3 | ... | 4 | 80 | 1 | 10.0 | 3 | 3 | 10.0 | 7.0 | 1.0 | 7.0 |
| 2 | 37 | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | 4 | ... | 2 | 80 | 0 | 7.0 | 3 | 3 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 33 | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | 4 | ... | 3 | 80 | 0 | 8.0 | 3 | 3 | 8.0 | 7.0 | 3.0 | 0.0 |
| 4 | 27 | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | 1 | ... | 4 | 80 | 1 | 6.0 | 3 | 3 | 2.0 | 2.0 | 2.0 | 2.0 |
5 rows × 34 columns
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df["BusinessTravel"]=le.fit_transform(df["BusinessTravel"])
df["Department"]=le.fit_transform(df["Department"])
df["Gender"]=le.fit_transform(df["Gender"])
df["EducationField"]=le.fit_transform(df["EducationField"])
df["JobRole"]=le.fit_transform(df["JobRole"])
df["Over18"]=le.fit_transform(df["Over18"])
df["MaritalStatus"]=le.fit_transform(df["MaritalStatus"])
df["OverTime"]=le.fit_transform(df["OverTime"])
y=le.fit_transform(y)
y
array([1, 0, 1, ..., 0, 0, 0])
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1470 entries, 0 to 1469 Data columns (total 34 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 1470 non-null int64 1 BusinessTravel 1470 non-null int32 2 DailyRate 1470 non-null int64 3 Department 1470 non-null int32 4 DistanceFromHome 1470 non-null int64 5 Education 1470 non-null int64 6 EducationField 1470 non-null int32 7 EmployeeCount 1470 non-null int64 8 EmployeeNumber 1470 non-null int64 9 EnvironmentSatisfaction 1470 non-null int64 10 Gender 1470 non-null int32 11 HourlyRate 1470 non-null int64 12 JobInvolvement 1470 non-null int64 13 JobLevel 1470 non-null int64 14 JobRole 1470 non-null int32 15 JobSatisfaction 1470 non-null int64 16 MaritalStatus 1470 non-null int32 17 MonthlyIncome 1470 non-null int64 18 MonthlyRate 1470 non-null int64 19 NumCompaniesWorked 1470 non-null int64 20 Over18 1470 non-null int32 21 OverTime 1470 non-null int32 22 PercentSalaryHike 1470 non-null int64 23 PerformanceRating 1470 non-null int64 24 RelationshipSatisfaction 1470 non-null int64 25 StandardHours 1470 non-null int64 26 StockOptionLevel 1470 non-null int64 27 TotalWorkingYears 1470 non-null float64 28 TrainingTimesLastYear 1470 non-null int64 29 WorkLifeBalance 1470 non-null int64 30 YearsAtCompany 1470 non-null float64 31 YearsInCurrentRole 1470 non-null float64 32 YearsSinceLastPromotion 1470 non-null float64 33 YearsWithCurrManager 1470 non-null float64 dtypes: float64(5), int32(8), int64(21) memory usage: 344.7 KB
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(df,y,test_size=0.3,random_state=0)
x_train.shape,x_test.shape,y_train.shape,y_test.shape
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train,y_train)
LinearRegression()
lr.coef_ #slope(m)
array([-3.55262827e-02, -2.37407707e-04, -1.74873966e-02, 3.46888242e-02,
2.45233228e-02, 3.99144884e-03, 8.39533719e-03, -2.42861287e-17,
-9.39830247e-03, -4.17783940e-02, 1.06780211e-02, -2.87784724e-03,
-3.84268092e-02, -1.47199505e-02, -1.60663577e-02, -3.61928591e-02,
3.34188778e-02, -5.71740448e-03, 6.18906010e-03, 3.77289197e-02,
3.46944695e-18, 9.53743908e-02, -2.53410326e-02, 2.01496333e-02,
-2.62974085e-02, 8.67361738e-18, -1.79892008e-02, -3.34590962e-02,
-1.15608002e-02, -3.12038906e-02, -2.45971395e-02, -1.10525106e-02,
2.11441127e-02, -6.61780458e-03])
lr.intercept_ #(c)
0.16229348882410102
y_pred = lr.predict(x_test)
y_pred
array([ 1.37918641e-01, 2.09645849e-01, 3.46618943e-01, 2.45299790e-02,
4.91189780e-01, 1.07950558e-01, 3.49717420e-01, 1.21303531e-01,
-1.68147526e-01, 3.93018819e-01, 1.48787705e-01, 2.71370382e-01,
-4.98437641e-02, 5.48001308e-01, 2.86732415e-01, 4.53384865e-03,
1.74257075e-01, 2.68615466e-01, 1.12381772e-01, 2.26443397e-01,
2.55690323e-01, 2.22170432e-02, 8.78773433e-02, 8.72934800e-02,
5.08104191e-01, 3.11558299e-01, 7.03725249e-02, 1.17087782e-01,
5.06787989e-01, 8.20414681e-02, -6.12800117e-02, 8.41105057e-03,
1.12903985e-01, 3.71456509e-01, 1.33899976e-01, 5.85122218e-02,
1.05463899e-01, 6.95765731e-02, 7.17231699e-02, 4.15928776e-02,
-1.24566180e-03, -2.39972238e-02, 5.76284569e-02, -2.34246953e-02,
-1.61110397e-02, 4.10202033e-01, 3.71234309e-01, -2.24739135e-01,
5.53693372e-01, 4.44700812e-01, 1.90332791e-01, 4.38757235e-01,
1.52737499e-01, 3.73079045e-01, 4.85209154e-01, 3.11395580e-01,
-5.65225335e-02, 3.32738373e-01, -3.80688401e-03, 2.51900892e-01,
-4.03385133e-02, 2.75634666e-01, 1.06239093e-01, 1.17977131e-01,
3.63620997e-01, 4.34148476e-02, 3.48886727e-01, 2.01030078e-01,
1.19780620e-01, 1.38006610e-01, -2.25349517e-02, 3.22513863e-01,
1.01833064e-01, 1.18709823e-01, 2.27909720e-01, 9.99558092e-02,
8.28180867e-02, 2.65059177e-01, 2.43214907e-01, 3.76528510e-02,
-9.24254557e-02, 6.46181810e-03, 1.92462755e-01, -3.05841005e-02,
-1.40455031e-02, 1.34476082e-01, 8.92059386e-02, -6.61582368e-03,
4.02381694e-02, 2.32432982e-01, 3.18410433e-01, 2.29919400e-01,
3.27216780e-01, 2.29679016e-01, -1.46541152e-02, 2.17063917e-01,
3.13689040e-01, 2.86177894e-01, 1.03533925e-01, 8.04842696e-02,
2.90981288e-01, 5.03285315e-01, 3.11425236e-01, -1.38793211e-02,
1.82884007e-01, -2.53921300e-03, 2.28176707e-02, 2.04033083e-01,
5.43003901e-02, 1.73910088e-01, 1.13059246e-01, 1.13836309e-01,
-2.70056312e-02, 2.14992354e-01, 1.00381160e-01, 2.36130104e-02,
1.01496763e-01, 2.25105091e-01, -9.29326222e-02, -8.61589045e-02,
2.04150313e-01, 2.67642579e-02, 1.20263286e-01, 5.93653681e-01,
-1.48892411e-03, -2.01916718e-02, -1.56507323e-01, 2.10127618e-01,
2.67716429e-01, 1.85389830e-01, -1.12033628e-02, 2.52651114e-01,
4.48468139e-01, 3.99840697e-01, 1.60877924e-01, 4.22509756e-01,
4.89913325e-01, 2.05107189e-01, 1.56726910e-01, 3.57756330e-01,
2.37737652e-01, 1.35712319e-01, 2.22899563e-01, 2.59497301e-01,
3.10183037e-01, -8.06057033e-03, 1.42038791e-01, -1.16301748e-01,
1.97902260e-01, 2.91520990e-01, 1.22100107e-01, 2.81323674e-01,
6.52896078e-02, 3.39738140e-01, 1.65408896e-01, -7.56481268e-03,
-4.82094705e-02, 1.32625459e-01, -9.98670581e-02, -5.23898017e-02,
3.27712556e-01, -1.05424413e-01, 2.16434375e-01, 6.13565280e-01,
1.96521473e-01, 2.94724697e-01, 1.98483998e-01, 1.76233456e-01,
-1.14073476e-02, -9.60119085e-02, 4.01142821e-02, 1.35420552e-01,
1.62534216e-01, 1.60623349e-01, -6.71778220e-02, 1.78553462e-01,
2.04800599e-01, 1.76893302e-01, 1.06730175e-01, 1.91246161e-01,
-3.26191911e-05, 1.99350802e-01, -5.69302861e-02, 5.76621098e-01,
5.77826155e-02, 3.18591034e-02, 3.28758832e-01, 1.01889009e-01,
5.43706300e-01, 1.37548853e-01, 3.61712844e-01, 3.67418627e-01,
2.51648697e-01, 5.17733543e-02, 1.31970977e-01, 2.79151681e-01,
8.27344489e-02, -6.43817945e-02, 3.30567457e-01, 8.74831815e-02,
2.13603580e-01, 2.42469911e-01, 4.88323716e-01, 1.27341546e-01,
2.93931396e-01, 4.50202197e-02, 4.44237196e-01, -7.50836221e-02,
2.45049642e-01, 2.59853132e-02, 1.64636165e-01, 2.23411213e-01,
8.96915787e-02, 1.36299284e-01, 2.16698333e-01, -2.32429846e-02,
4.26676675e-02, 1.12509555e-01, 3.89291623e-02, 2.25071403e-01,
1.66095421e-01, 2.58703279e-01, 5.56734590e-01, 1.32324109e-01,
3.79943349e-01, -7.78502127e-02, 1.32771119e-01, 2.95885065e-01,
3.11610191e-01, 2.38774333e-02, -2.87603208e-02, 3.42449064e-01,
9.38966786e-02, -3.73148651e-02, 2.59431954e-01, 3.35895588e-01,
3.02976848e-01, -1.35134138e-01, 1.47986632e-01, -1.31579916e-02,
2.21514928e-01, 2.90795587e-01, 6.43228111e-02, 3.00834651e-01,
9.04028941e-02, 3.26479488e-02, 4.09470190e-01, 3.36945098e-01,
-6.21240394e-03, 2.64118087e-01, 3.19933907e-01, 4.07156886e-01,
5.57242720e-01, 7.42079617e-02, 2.50216028e-01, 9.67353086e-02,
2.94052763e-03, 5.18640348e-01, 3.50958952e-01, 3.48924520e-01,
3.81255167e-01, 4.17423927e-02, 2.56503632e-01, 1.32127623e-01,
1.51070283e-01, 1.28369101e-01, -1.45942851e-01, 3.00966858e-01,
4.09263274e-01, 1.13937193e-01, 1.74136119e-01, -3.49558918e-02,
1.98685542e-01, 7.35379514e-02, -3.84636560e-02, 2.85843168e-02,
1.69538531e-01, 3.41932238e-01, 3.26610519e-01, 2.83381200e-01,
2.95856657e-01, -8.22827724e-02, 1.87675462e-01, 2.03101266e-01,
1.91131256e-01, 2.43417404e-01, -1.27626081e-01, 2.86046246e-01,
-1.14788018e-01, 2.43450787e-02, 2.88057243e-01, 6.02644834e-01,
1.10463429e-01, 2.89266212e-01, 2.40873066e-01, 1.70014615e-04,
3.09904672e-01, 2.49547375e-01, 5.55319180e-01, -9.23969865e-02,
2.19533466e-01, 1.38736979e-01, 1.74093862e-01, 1.76093835e-02,
9.59684557e-02, 1.21305954e-01, 2.11989195e-01, 3.18357361e-01,
-1.91942403e-01, -1.09916872e-01, 4.22249476e-02, -1.47262799e-02,
2.00126706e-01, 2.77248747e-01, 2.34014277e-01, 2.82243223e-01,
2.27445018e-01, 8.57463446e-02, 1.39235767e-01, -1.73108233e-02,
2.84825900e-01, 1.39770403e-01, 4.03641602e-01, 3.31387232e-01,
1.83287506e-01, 1.30231898e-01, 3.16555776e-01, 3.07328881e-01,
-1.75303209e-01, 1.11986750e-01, -4.11000700e-03, 3.38159249e-01,
1.29119463e-01, 3.74684025e-01, 2.36238514e-01, -1.74420911e-01,
2.46628095e-01, -2.38125014e-01, 3.40788524e-01, 1.42078584e-01,
1.20626299e-01, 4.74657095e-01, 1.51469009e-01, 4.22308868e-01,
1.21407466e-01, 2.63311223e-03, 2.38771976e-01, 1.29182910e-01,
2.77954845e-02, 2.40992288e-01, 6.29589529e-02, 3.47539889e-01,
1.62946868e-01, 9.27652168e-02, 2.35579026e-01, -6.89476906e-02,
1.98088258e-01, 1.82300453e-01, -6.65824771e-03, 2.56155754e-01,
-2.75649748e-01, -3.29450143e-02, 1.90789095e-01, 8.02511366e-03,
2.00921432e-01, 2.08561946e-01, -1.17423002e-01, 3.54229336e-01,
6.31247760e-02, 2.38781019e-01, -8.99468795e-02, 3.13376697e-01,
9.58905226e-03, 2.28633569e-01, 3.19904288e-01, 1.16810259e-01,
1.48764263e-03, 9.91628800e-02, 4.74273794e-01, -3.72096272e-02,
3.16976188e-01, 1.26886446e-02, 1.77450592e-01, 2.06779853e-01,
-2.10325212e-01, 2.68332256e-01, -1.60370623e-01, 3.48152998e-01,
-9.75562664e-02, 2.20673567e-01, 2.41867569e-02, 2.02017809e-01,
9.91156987e-02, 3.17738842e-01, 4.25496849e-01, -3.91468058e-03,
8.23016682e-02, 3.11627766e-01, 3.58324118e-02, 1.13365997e-01,
4.18691609e-01, 1.55177410e-01, 5.28515497e-02, 3.81322460e-02,
9.03487898e-02, -6.09750134e-02, 3.40047149e-01, 1.23447452e-01,
-5.01971894e-02, -5.44905815e-02, 3.13940849e-01, 1.06590752e-01,
2.12005593e-01, 4.07289132e-01, 3.37128902e-01, 1.77987670e-01,
2.85970655e-01, 2.95978933e-01, 2.54161818e-01, -1.24732176e-02,
2.27055783e-01, 1.50099453e-01, 6.67083452e-02, -1.11654341e-03,
9.65185224e-03, 8.27349134e-02, 1.25842708e-01, 1.79918627e-01,
2.52422294e-01, -1.92117486e-01, 3.05169765e-01, 1.89746681e-01,
-9.76959503e-02, 5.20083323e-02, 1.53887950e-01, 1.63938425e-01,
1.66673090e-01, 2.19125074e-01, 2.26055268e-01, 1.07251041e-01,
-7.61484587e-02])
y_test
array([0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0])
from sklearn.linear_model import LogisticRegression
lg=LogisticRegression()
lg.fit(x_train,y_train)
LogisticRegression()
y_pred_lg=lg.predict(x_test)
y_pred
array([ 1.37918641e-01, 2.09645849e-01, 3.46618943e-01, 2.45299790e-02,
4.91189780e-01, 1.07950558e-01, 3.49717420e-01, 1.21303531e-01,
-1.68147526e-01, 3.93018819e-01, 1.48787705e-01, 2.71370382e-01,
-4.98437641e-02, 5.48001308e-01, 2.86732415e-01, 4.53384865e-03,
1.74257075e-01, 2.68615466e-01, 1.12381772e-01, 2.26443397e-01,
2.55690323e-01, 2.22170432e-02, 8.78773433e-02, 8.72934800e-02,
5.08104191e-01, 3.11558299e-01, 7.03725249e-02, 1.17087782e-01,
5.06787989e-01, 8.20414681e-02, -6.12800117e-02, 8.41105057e-03,
1.12903985e-01, 3.71456509e-01, 1.33899976e-01, 5.85122218e-02,
1.05463899e-01, 6.95765731e-02, 7.17231699e-02, 4.15928776e-02,
-1.24566180e-03, -2.39972238e-02, 5.76284569e-02, -2.34246953e-02,
-1.61110397e-02, 4.10202033e-01, 3.71234309e-01, -2.24739135e-01,
5.53693372e-01, 4.44700812e-01, 1.90332791e-01, 4.38757235e-01,
1.52737499e-01, 3.73079045e-01, 4.85209154e-01, 3.11395580e-01,
-5.65225335e-02, 3.32738373e-01, -3.80688401e-03, 2.51900892e-01,
-4.03385133e-02, 2.75634666e-01, 1.06239093e-01, 1.17977131e-01,
3.63620997e-01, 4.34148476e-02, 3.48886727e-01, 2.01030078e-01,
1.19780620e-01, 1.38006610e-01, -2.25349517e-02, 3.22513863e-01,
1.01833064e-01, 1.18709823e-01, 2.27909720e-01, 9.99558092e-02,
8.28180867e-02, 2.65059177e-01, 2.43214907e-01, 3.76528510e-02,
-9.24254557e-02, 6.46181810e-03, 1.92462755e-01, -3.05841005e-02,
-1.40455031e-02, 1.34476082e-01, 8.92059386e-02, -6.61582368e-03,
4.02381694e-02, 2.32432982e-01, 3.18410433e-01, 2.29919400e-01,
3.27216780e-01, 2.29679016e-01, -1.46541152e-02, 2.17063917e-01,
3.13689040e-01, 2.86177894e-01, 1.03533925e-01, 8.04842696e-02,
2.90981288e-01, 5.03285315e-01, 3.11425236e-01, -1.38793211e-02,
1.82884007e-01, -2.53921300e-03, 2.28176707e-02, 2.04033083e-01,
5.43003901e-02, 1.73910088e-01, 1.13059246e-01, 1.13836309e-01,
-2.70056312e-02, 2.14992354e-01, 1.00381160e-01, 2.36130104e-02,
1.01496763e-01, 2.25105091e-01, -9.29326222e-02, -8.61589045e-02,
2.04150313e-01, 2.67642579e-02, 1.20263286e-01, 5.93653681e-01,
-1.48892411e-03, -2.01916718e-02, -1.56507323e-01, 2.10127618e-01,
2.67716429e-01, 1.85389830e-01, -1.12033628e-02, 2.52651114e-01,
4.48468139e-01, 3.99840697e-01, 1.60877924e-01, 4.22509756e-01,
4.89913325e-01, 2.05107189e-01, 1.56726910e-01, 3.57756330e-01,
2.37737652e-01, 1.35712319e-01, 2.22899563e-01, 2.59497301e-01,
3.10183037e-01, -8.06057033e-03, 1.42038791e-01, -1.16301748e-01,
1.97902260e-01, 2.91520990e-01, 1.22100107e-01, 2.81323674e-01,
6.52896078e-02, 3.39738140e-01, 1.65408896e-01, -7.56481268e-03,
-4.82094705e-02, 1.32625459e-01, -9.98670581e-02, -5.23898017e-02,
3.27712556e-01, -1.05424413e-01, 2.16434375e-01, 6.13565280e-01,
1.96521473e-01, 2.94724697e-01, 1.98483998e-01, 1.76233456e-01,
-1.14073476e-02, -9.60119085e-02, 4.01142821e-02, 1.35420552e-01,
1.62534216e-01, 1.60623349e-01, -6.71778220e-02, 1.78553462e-01,
2.04800599e-01, 1.76893302e-01, 1.06730175e-01, 1.91246161e-01,
-3.26191911e-05, 1.99350802e-01, -5.69302861e-02, 5.76621098e-01,
5.77826155e-02, 3.18591034e-02, 3.28758832e-01, 1.01889009e-01,
5.43706300e-01, 1.37548853e-01, 3.61712844e-01, 3.67418627e-01,
2.51648697e-01, 5.17733543e-02, 1.31970977e-01, 2.79151681e-01,
8.27344489e-02, -6.43817945e-02, 3.30567457e-01, 8.74831815e-02,
2.13603580e-01, 2.42469911e-01, 4.88323716e-01, 1.27341546e-01,
2.93931396e-01, 4.50202197e-02, 4.44237196e-01, -7.50836221e-02,
2.45049642e-01, 2.59853132e-02, 1.64636165e-01, 2.23411213e-01,
8.96915787e-02, 1.36299284e-01, 2.16698333e-01, -2.32429846e-02,
4.26676675e-02, 1.12509555e-01, 3.89291623e-02, 2.25071403e-01,
1.66095421e-01, 2.58703279e-01, 5.56734590e-01, 1.32324109e-01,
3.79943349e-01, -7.78502127e-02, 1.32771119e-01, 2.95885065e-01,
3.11610191e-01, 2.38774333e-02, -2.87603208e-02, 3.42449064e-01,
9.38966786e-02, -3.73148651e-02, 2.59431954e-01, 3.35895588e-01,
3.02976848e-01, -1.35134138e-01, 1.47986632e-01, -1.31579916e-02,
2.21514928e-01, 2.90795587e-01, 6.43228111e-02, 3.00834651e-01,
9.04028941e-02, 3.26479488e-02, 4.09470190e-01, 3.36945098e-01,
-6.21240394e-03, 2.64118087e-01, 3.19933907e-01, 4.07156886e-01,
5.57242720e-01, 7.42079617e-02, 2.50216028e-01, 9.67353086e-02,
2.94052763e-03, 5.18640348e-01, 3.50958952e-01, 3.48924520e-01,
3.81255167e-01, 4.17423927e-02, 2.56503632e-01, 1.32127623e-01,
1.51070283e-01, 1.28369101e-01, -1.45942851e-01, 3.00966858e-01,
4.09263274e-01, 1.13937193e-01, 1.74136119e-01, -3.49558918e-02,
1.98685542e-01, 7.35379514e-02, -3.84636560e-02, 2.85843168e-02,
1.69538531e-01, 3.41932238e-01, 3.26610519e-01, 2.83381200e-01,
2.95856657e-01, -8.22827724e-02, 1.87675462e-01, 2.03101266e-01,
1.91131256e-01, 2.43417404e-01, -1.27626081e-01, 2.86046246e-01,
-1.14788018e-01, 2.43450787e-02, 2.88057243e-01, 6.02644834e-01,
1.10463429e-01, 2.89266212e-01, 2.40873066e-01, 1.70014615e-04,
3.09904672e-01, 2.49547375e-01, 5.55319180e-01, -9.23969865e-02,
2.19533466e-01, 1.38736979e-01, 1.74093862e-01, 1.76093835e-02,
9.59684557e-02, 1.21305954e-01, 2.11989195e-01, 3.18357361e-01,
-1.91942403e-01, -1.09916872e-01, 4.22249476e-02, -1.47262799e-02,
2.00126706e-01, 2.77248747e-01, 2.34014277e-01, 2.82243223e-01,
2.27445018e-01, 8.57463446e-02, 1.39235767e-01, -1.73108233e-02,
2.84825900e-01, 1.39770403e-01, 4.03641602e-01, 3.31387232e-01,
1.83287506e-01, 1.30231898e-01, 3.16555776e-01, 3.07328881e-01,
-1.75303209e-01, 1.11986750e-01, -4.11000700e-03, 3.38159249e-01,
1.29119463e-01, 3.74684025e-01, 2.36238514e-01, -1.74420911e-01,
2.46628095e-01, -2.38125014e-01, 3.40788524e-01, 1.42078584e-01,
1.20626299e-01, 4.74657095e-01, 1.51469009e-01, 4.22308868e-01,
1.21407466e-01, 2.63311223e-03, 2.38771976e-01, 1.29182910e-01,
2.77954845e-02, 2.40992288e-01, 6.29589529e-02, 3.47539889e-01,
1.62946868e-01, 9.27652168e-02, 2.35579026e-01, -6.89476906e-02,
1.98088258e-01, 1.82300453e-01, -6.65824771e-03, 2.56155754e-01,
-2.75649748e-01, -3.29450143e-02, 1.90789095e-01, 8.02511366e-03,
2.00921432e-01, 2.08561946e-01, -1.17423002e-01, 3.54229336e-01,
6.31247760e-02, 2.38781019e-01, -8.99468795e-02, 3.13376697e-01,
9.58905226e-03, 2.28633569e-01, 3.19904288e-01, 1.16810259e-01,
1.48764263e-03, 9.91628800e-02, 4.74273794e-01, -3.72096272e-02,
3.16976188e-01, 1.26886446e-02, 1.77450592e-01, 2.06779853e-01,
-2.10325212e-01, 2.68332256e-01, -1.60370623e-01, 3.48152998e-01,
-9.75562664e-02, 2.20673567e-01, 2.41867569e-02, 2.02017809e-01,
9.91156987e-02, 3.17738842e-01, 4.25496849e-01, -3.91468058e-03,
8.23016682e-02, 3.11627766e-01, 3.58324118e-02, 1.13365997e-01,
4.18691609e-01, 1.55177410e-01, 5.28515497e-02, 3.81322460e-02,
9.03487898e-02, -6.09750134e-02, 3.40047149e-01, 1.23447452e-01,
-5.01971894e-02, -5.44905815e-02, 3.13940849e-01, 1.06590752e-01,
2.12005593e-01, 4.07289132e-01, 3.37128902e-01, 1.77987670e-01,
2.85970655e-01, 2.95978933e-01, 2.54161818e-01, -1.24732176e-02,
2.27055783e-01, 1.50099453e-01, 6.67083452e-02, -1.11654341e-03,
9.65185224e-03, 8.27349134e-02, 1.25842708e-01, 1.79918627e-01,
2.52422294e-01, -1.92117486e-01, 3.05169765e-01, 1.89746681e-01,
-9.76959503e-02, 5.20083323e-02, 1.53887950e-01, 1.63938425e-01,
1.66673090e-01, 2.19125074e-01, 2.26055268e-01, 1.07251041e-01,
-7.61484587e-02])
y_test
array([0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0])
score = lg.score(x_test, y_test)
print(score)
0.8798185941043084
from sklearn import metrics
cm = metrics.confusion_matrix(y_test,y_pred_lg)
print(cm)
[[366 5] [ 48 22]]
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
rg=Ridge()
parametres={"alpha":[1,2,3,5,10,20,30,40,60,70,80,90]}
ridgecv=GridSearchCV(rg,parametres,scoring="neg_mean_squared_error",cv=5)
ridgecv.fit(x_train,y_train)
GridSearchCV(cv=5, estimator=Ridge(),
param_grid={'alpha': [1, 2, 3, 5, 10, 20, 30, 40, 60, 70, 80, 90]},
scoring='neg_mean_squared_error')
print(ridgecv.best_params_)
{'alpha': 90}
print(ridgecv.best_score_)
-0.11419243259092107
y_pred_rg=ridgecv.predict(x_test)
y_pred_rg
array([ 0.14109504, 0.2151691 , 0.34173459, 0.02124446, 0.47719499,
0.12198146, 0.33497545, 0.12490299, -0.14125808, 0.3692482 ,
0.13729583, 0.27361383, -0.02917434, 0.51620367, 0.27207331,
0.01824115, 0.17794412, 0.27065952, 0.10815569, 0.21977899,
0.26084061, 0.01554811, 0.09134938, 0.09751136, 0.48600942,
0.2984626 , 0.08057128, 0.11513836, 0.48356208, 0.09103569,
-0.05484891, 0.0285067 , 0.1129696 , 0.34701084, 0.13094928,
0.07485085, 0.10555091, 0.07907614, 0.07969639, 0.0544783 ,
0.02060619, -0.00183533, 0.06301744, -0.02236072, -0.00278479,
0.4010434 , 0.34728611, -0.1927277 , 0.5346335 , 0.43130962,
0.18938795, 0.42180993, 0.14687555, 0.34970057, 0.46348416,
0.30365578, -0.04035124, 0.31939699, 0.01371085, 0.24378973,
-0.0217235 , 0.26753241, 0.10121524, 0.11606132, 0.35360748,
0.05169705, 0.33404147, 0.20046454, 0.12205985, 0.14961595,
-0.01793525, 0.30910146, 0.10659273, 0.12404783, 0.21723975,
0.09619168, 0.09223462, 0.2527774 , 0.2434066 , 0.04302603,
-0.08068771, 0.00885581, 0.20277265, -0.01128223, -0.00268362,
0.14391232, 0.08523364, 0.01697099, 0.03807104, 0.22447627,
0.31999025, 0.20633618, 0.31169428, 0.22545957, -0.00800339,
0.21615505, 0.305808 , 0.28297317, 0.12410829, 0.08696459,
0.2900393 , 0.47871592, 0.29513287, -0.00137468, 0.17541262,
0.01996838, 0.03109392, 0.20499566, 0.06969531, 0.1761237 ,
0.12015783, 0.11231266, -0.01476375, 0.20271386, 0.11962843,
0.03915193, 0.10499323, 0.21931398, -0.07698367, -0.0665132 ,
0.20651982, 0.04056639, 0.12416724, 0.55826367, 0.01757054,
-0.02493194, -0.12329801, 0.21009741, 0.25273881, 0.1816946 ,
-0.007511 , 0.25130851, 0.42754037, 0.37456929, 0.16190887,
0.40702053, 0.46666191, 0.20114426, 0.15384418, 0.33898499,
0.2307789 , 0.13230542, 0.20655353, 0.25011455, 0.29662454,
0.0025485 , 0.13266836, -0.09721403, 0.18811579, 0.28139863,
0.12732109, 0.26524036, 0.07725868, 0.30688664, 0.14447971,
0.013183 , -0.03942657, 0.14270859, -0.08627402, -0.03421625,
0.30213893, -0.08917788, 0.22285798, 0.58093918, 0.19171308,
0.29370047, 0.19720025, 0.17711461, 0.01103987, -0.08010859,
0.0436271 , 0.14234274, 0.17426814, 0.16266974, -0.05060372,
0.19175571, 0.20196328, 0.16857596, 0.10443308, 0.18311543,
0.01763848, 0.18615567, -0.04875274, 0.53825218, 0.05848671,
0.04679565, 0.31562785, 0.10181854, 0.5181803 , 0.14911037,
0.35414888, 0.35942839, 0.24117501, 0.07417005, 0.13916591,
0.26529946, 0.08928299, -0.05456868, 0.32101304, 0.0938359 ,
0.21039068, 0.24567504, 0.46506683, 0.11831966, 0.27722024,
0.06021621, 0.43127136, -0.0715464 , 0.22663568, 0.03272084,
0.15878571, 0.21849593, 0.09466811, 0.14378006, 0.217633 ,
-0.00639564, 0.02370049, 0.12724414, 0.04452184, 0.23269135,
0.15163962, 0.26413639, 0.53610729, 0.13078083, 0.37489765,
-0.05698025, 0.13745969, 0.28896174, 0.31270411, 0.03233224,
-0.01940844, 0.33060856, 0.10514329, -0.02133009, 0.24376887,
0.32185562, 0.2856154 , -0.10845687, 0.13873432, 0.00149298,
0.21956768, 0.28718085, 0.05868158, 0.29284815, 0.07614951,
0.04634742, 0.38159732, 0.33207598, -0.00388774, 0.2530142 ,
0.31612267, 0.38100539, 0.52909583, 0.0806722 , 0.25128497,
0.09521239, 0.00534904, 0.49269236, 0.32856155, 0.32696873,
0.36064074, 0.0546294 , 0.2509809 , 0.12967062, 0.15599954,
0.13963264, -0.12393534, 0.29273719, 0.37976292, 0.12581364,
0.18158315, -0.02695603, 0.20519083, 0.07355053, -0.01322105,
0.03425304, 0.16070607, 0.33705457, 0.30392952, 0.27480431,
0.28689683, -0.05820897, 0.19339178, 0.18979934, 0.17251068,
0.24901993, -0.10436346, 0.27491983, -0.09879457, 0.040643 ,
0.28533901, 0.56457591, 0.11608525, 0.28829086, 0.22340771,
0.01709513, 0.29241833, 0.24239142, 0.53541937, -0.07385175,
0.21711996, 0.13866988, 0.18275949, 0.03028679, 0.10956063,
0.11162148, 0.22370113, 0.29819573, -0.16202074, -0.08241599,
0.05195339, 0.00784795, 0.19928314, 0.25909829, 0.23955218,
0.28358317, 0.21996076, 0.09566008, 0.14599491, 0.0009498 ,
0.2835304 , 0.1299209 , 0.38921339, 0.32701586, 0.1721737 ,
0.12858167, 0.30896035, 0.30617785, -0.15816062, 0.12752759,
0.01243085, 0.33533922, 0.12879927, 0.36453542, 0.23508022,
-0.1466139 , 0.24639121, -0.20667964, 0.32581433, 0.13947642,
0.13158952, 0.45137318, 0.15420176, 0.40161435, 0.1157631 ,
0.02294896, 0.23434361, 0.11256453, 0.03310285, 0.24816994,
0.07408071, 0.33903586, 0.1647869 , 0.09296063, 0.23518843,
-0.05757715, 0.19073196, 0.19502373, -0.01113003, 0.25491531,
-0.2560317 , -0.02828648, 0.18158068, 0.01729974, 0.19472761,
0.20526791, -0.09548153, 0.33564205, 0.05435555, 0.23089882,
-0.08447158, 0.29845764, 0.03030467, 0.22037718, 0.30734621,
0.09957496, 0.00639825, 0.10024417, 0.4510534 , -0.02591716,
0.3022535 , 0.03001962, 0.17603079, 0.20227153, -0.17865485,
0.27503145, -0.1464023 , 0.33459748, -0.07585477, 0.22048685,
0.03687081, 0.18969765, 0.1110831 , 0.31271761, 0.41453678,
0.01800242, 0.08654666, 0.29966409, 0.04754104, 0.11777157,
0.4048086 , 0.15418205, 0.04629587, 0.04189527, 0.10132682,
-0.0504673 , 0.3235537 , 0.12502821, -0.01975565, -0.03378431,
0.29185542, 0.10341047, 0.21469468, 0.38576482, 0.31906509,
0.17836402, 0.27860474, 0.29086816, 0.24224872, 0.00283967,
0.22596665, 0.14805123, 0.06871467, 0.00684606, 0.02231177,
0.09735064, 0.12025689, 0.19786736, 0.24142977, -0.17153291,
0.30182055, 0.19593933, -0.07165104, 0.05909201, 0.15645822,
0.16625188, 0.16235541, 0.21541469, 0.22503392, 0.11917496,
-0.05708581])
y_test
array([0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0])
from sklearn import metrics
print(metrics.r2_score(y_test,y_pred_rg))
print(metrics.r2_score(y_train,ridgecv.predict(x_train)))
0.21120590171934883 0.20665483137162655
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
la=Ridge()
parametres={"alpha":[1,2,3,5,10,20,30,40,60,70,80,90]}
ridgecv=GridSearchCV(la,parametres,scoring="neg_mean_squared_error",cv=5)
ridgecv.fit(x_train,y_train)
GridSearchCV(cv=5, estimator=Ridge(),
param_grid={'alpha': [1, 2, 3, 5, 10, 20, 30, 40, 60, 70, 80, 90]},
scoring='neg_mean_squared_error')
print(ridgecv.best_params_)
{'alpha': 90}
print(ridgecv.best_score_)
-0.11419243259092107
y_pred_la=ridgecv.predict(x_test)
y_pred_la
array([ 0.14109504, 0.2151691 , 0.34173459, 0.02124446, 0.47719499,
0.12198146, 0.33497545, 0.12490299, -0.14125808, 0.3692482 ,
0.13729583, 0.27361383, -0.02917434, 0.51620367, 0.27207331,
0.01824115, 0.17794412, 0.27065952, 0.10815569, 0.21977899,
0.26084061, 0.01554811, 0.09134938, 0.09751136, 0.48600942,
0.2984626 , 0.08057128, 0.11513836, 0.48356208, 0.09103569,
-0.05484891, 0.0285067 , 0.1129696 , 0.34701084, 0.13094928,
0.07485085, 0.10555091, 0.07907614, 0.07969639, 0.0544783 ,
0.02060619, -0.00183533, 0.06301744, -0.02236072, -0.00278479,
0.4010434 , 0.34728611, -0.1927277 , 0.5346335 , 0.43130962,
0.18938795, 0.42180993, 0.14687555, 0.34970057, 0.46348416,
0.30365578, -0.04035124, 0.31939699, 0.01371085, 0.24378973,
-0.0217235 , 0.26753241, 0.10121524, 0.11606132, 0.35360748,
0.05169705, 0.33404147, 0.20046454, 0.12205985, 0.14961595,
-0.01793525, 0.30910146, 0.10659273, 0.12404783, 0.21723975,
0.09619168, 0.09223462, 0.2527774 , 0.2434066 , 0.04302603,
-0.08068771, 0.00885581, 0.20277265, -0.01128223, -0.00268362,
0.14391232, 0.08523364, 0.01697099, 0.03807104, 0.22447627,
0.31999025, 0.20633618, 0.31169428, 0.22545957, -0.00800339,
0.21615505, 0.305808 , 0.28297317, 0.12410829, 0.08696459,
0.2900393 , 0.47871592, 0.29513287, -0.00137468, 0.17541262,
0.01996838, 0.03109392, 0.20499566, 0.06969531, 0.1761237 ,
0.12015783, 0.11231266, -0.01476375, 0.20271386, 0.11962843,
0.03915193, 0.10499323, 0.21931398, -0.07698367, -0.0665132 ,
0.20651982, 0.04056639, 0.12416724, 0.55826367, 0.01757054,
-0.02493194, -0.12329801, 0.21009741, 0.25273881, 0.1816946 ,
-0.007511 , 0.25130851, 0.42754037, 0.37456929, 0.16190887,
0.40702053, 0.46666191, 0.20114426, 0.15384418, 0.33898499,
0.2307789 , 0.13230542, 0.20655353, 0.25011455, 0.29662454,
0.0025485 , 0.13266836, -0.09721403, 0.18811579, 0.28139863,
0.12732109, 0.26524036, 0.07725868, 0.30688664, 0.14447971,
0.013183 , -0.03942657, 0.14270859, -0.08627402, -0.03421625,
0.30213893, -0.08917788, 0.22285798, 0.58093918, 0.19171308,
0.29370047, 0.19720025, 0.17711461, 0.01103987, -0.08010859,
0.0436271 , 0.14234274, 0.17426814, 0.16266974, -0.05060372,
0.19175571, 0.20196328, 0.16857596, 0.10443308, 0.18311543,
0.01763848, 0.18615567, -0.04875274, 0.53825218, 0.05848671,
0.04679565, 0.31562785, 0.10181854, 0.5181803 , 0.14911037,
0.35414888, 0.35942839, 0.24117501, 0.07417005, 0.13916591,
0.26529946, 0.08928299, -0.05456868, 0.32101304, 0.0938359 ,
0.21039068, 0.24567504, 0.46506683, 0.11831966, 0.27722024,
0.06021621, 0.43127136, -0.0715464 , 0.22663568, 0.03272084,
0.15878571, 0.21849593, 0.09466811, 0.14378006, 0.217633 ,
-0.00639564, 0.02370049, 0.12724414, 0.04452184, 0.23269135,
0.15163962, 0.26413639, 0.53610729, 0.13078083, 0.37489765,
-0.05698025, 0.13745969, 0.28896174, 0.31270411, 0.03233224,
-0.01940844, 0.33060856, 0.10514329, -0.02133009, 0.24376887,
0.32185562, 0.2856154 , -0.10845687, 0.13873432, 0.00149298,
0.21956768, 0.28718085, 0.05868158, 0.29284815, 0.07614951,
0.04634742, 0.38159732, 0.33207598, -0.00388774, 0.2530142 ,
0.31612267, 0.38100539, 0.52909583, 0.0806722 , 0.25128497,
0.09521239, 0.00534904, 0.49269236, 0.32856155, 0.32696873,
0.36064074, 0.0546294 , 0.2509809 , 0.12967062, 0.15599954,
0.13963264, -0.12393534, 0.29273719, 0.37976292, 0.12581364,
0.18158315, -0.02695603, 0.20519083, 0.07355053, -0.01322105,
0.03425304, 0.16070607, 0.33705457, 0.30392952, 0.27480431,
0.28689683, -0.05820897, 0.19339178, 0.18979934, 0.17251068,
0.24901993, -0.10436346, 0.27491983, -0.09879457, 0.040643 ,
0.28533901, 0.56457591, 0.11608525, 0.28829086, 0.22340771,
0.01709513, 0.29241833, 0.24239142, 0.53541937, -0.07385175,
0.21711996, 0.13866988, 0.18275949, 0.03028679, 0.10956063,
0.11162148, 0.22370113, 0.29819573, -0.16202074, -0.08241599,
0.05195339, 0.00784795, 0.19928314, 0.25909829, 0.23955218,
0.28358317, 0.21996076, 0.09566008, 0.14599491, 0.0009498 ,
0.2835304 , 0.1299209 , 0.38921339, 0.32701586, 0.1721737 ,
0.12858167, 0.30896035, 0.30617785, -0.15816062, 0.12752759,
0.01243085, 0.33533922, 0.12879927, 0.36453542, 0.23508022,
-0.1466139 , 0.24639121, -0.20667964, 0.32581433, 0.13947642,
0.13158952, 0.45137318, 0.15420176, 0.40161435, 0.1157631 ,
0.02294896, 0.23434361, 0.11256453, 0.03310285, 0.24816994,
0.07408071, 0.33903586, 0.1647869 , 0.09296063, 0.23518843,
-0.05757715, 0.19073196, 0.19502373, -0.01113003, 0.25491531,
-0.2560317 , -0.02828648, 0.18158068, 0.01729974, 0.19472761,
0.20526791, -0.09548153, 0.33564205, 0.05435555, 0.23089882,
-0.08447158, 0.29845764, 0.03030467, 0.22037718, 0.30734621,
0.09957496, 0.00639825, 0.10024417, 0.4510534 , -0.02591716,
0.3022535 , 0.03001962, 0.17603079, 0.20227153, -0.17865485,
0.27503145, -0.1464023 , 0.33459748, -0.07585477, 0.22048685,
0.03687081, 0.18969765, 0.1110831 , 0.31271761, 0.41453678,
0.01800242, 0.08654666, 0.29966409, 0.04754104, 0.11777157,
0.4048086 , 0.15418205, 0.04629587, 0.04189527, 0.10132682,
-0.0504673 , 0.3235537 , 0.12502821, -0.01975565, -0.03378431,
0.29185542, 0.10341047, 0.21469468, 0.38576482, 0.31906509,
0.17836402, 0.27860474, 0.29086816, 0.24224872, 0.00283967,
0.22596665, 0.14805123, 0.06871467, 0.00684606, 0.02231177,
0.09735064, 0.12025689, 0.19786736, 0.24142977, -0.17153291,
0.30182055, 0.19593933, -0.07165104, 0.05909201, 0.15645822,
0.16625188, 0.16235541, 0.21541469, 0.22503392, 0.11917496,
-0.05708581])
from sklearn import metrics
print(metrics.r2_score(y_test,y_pred_la))
print(metrics.r2_score(y_train,ridgecv.predict(x_train)))
0.21120590171934883 0.20665483137162655
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier()
dtc.fit(x_train,y_train)
DecisionTreeClassifier()
pred=dtc.predict(x_test)
pred
array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0])
y_test
array([0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0])
#Accuracy score
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,roc_auc_score,roc_curve
accuracy_score(y_test,pred)
0.7551020408163265
confusion_matrix(y_test,pred)
array([[309, 62],
[ 46, 24]], dtype=int64)
pd.crosstab(y_test,pred)
| col_0 | 0 | 1 |
|---|---|---|
| row_0 | ||
| 0 | 309 | 62 |
| 1 | 46 | 24 |
print(classification_report(y_test,pred))
precision recall f1-score support
0 0.87 0.83 0.85 371
1 0.28 0.34 0.31 70
accuracy 0.76 441
macro avg 0.57 0.59 0.58 441
weighted avg 0.78 0.76 0.76 441
probability=dtc.predict_proba(x_test)[:,1]
# roc_curve
fpr,tpr,threshsholds = roc_curve(y_test,probability)
plt.plot(fpr,tpr)
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title('ROC CURVE')
plt.show()
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
forest_params = [{'max_depth': list(range(10, 15)), 'max_features': list(range(0,14))}]
from sklearn.model_selection import GridSearchCV
rfc_cv= GridSearchCV(rfc,param_grid=forest_params,cv=10,scoring="accuracy")
rfc_cv.fit(x_train,y_train)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:372: FitFailedWarning:
50 fits failed out of a total of 700.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.
Below are more details about the failures:
--------------------------------------------------------------------------------
50 fits failed with the following error:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 450, in fit
trees = Parallel(
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
if self.dispatch_one_batch(iterator):
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
self._dispatch(tasks)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
result = ImmediateResult(func)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 572, in __init__
self.results = batch()
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 262, in __call__
return [func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py", line 262, in <listcomp>
return [func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\fixes.py", line 216, in __call__
return self.function(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 185, in _parallel_build_trees
tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 937, in fit
super().fit(
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 308, in fit
raise ValueError("max_features must be in (0, n_features]")
ValueError: max_features must be in (0, n_features]
warnings.warn(some_fits_failed_message, FitFailedWarning)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py:969: UserWarning: One or more of the test scores are non-finite: [ nan 0.84159528 0.84352751 0.84936227 0.85226537 0.85615839
0.85518751 0.85518751 0.85810965 0.84937179 0.84643061 0.85227489
0.84935275 0.85226537 nan 0.84062441 0.85032362 0.84935275
0.85421664 0.85422616 0.85323625 0.85421664 0.85323625 0.84838188
0.85421664 0.85516848 0.847411 0.85323625 nan 0.84353703
0.8483914 0.85226537 0.85227489 0.85033314 0.85032362 0.85323625
0.84742052 0.85421664 0.85128498 0.84838188 0.85420712 0.85517799
nan 0.84547877 0.85132305 0.85130402 0.85712926 0.84935275
0.8512945 0.85518751 0.85519703 0.84740148 0.84935275 0.85031411
0.84935275 0.84936227 nan 0.84644965 0.84743004 0.85519703
0.85810965 0.85130402 0.85324576 0.84936227 0.85323625 0.85420712
0.85031411 0.85420712 0.85225585 0.8580811 ]
warnings.warn(
GridSearchCV(cv=10, estimator=RandomForestClassifier(),
param_grid=[{'max_depth': [10, 11, 12, 13, 14],
'max_features': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13]}],
scoring='accuracy')
pred=rfc_cv.predict(x_test)
print(classification_report(y_test,pred))
precision recall f1-score support
0 0.87 0.98 0.92 371
1 0.70 0.20 0.31 70
accuracy 0.86 441
macro avg 0.78 0.59 0.62 441
weighted avg 0.84 0.86 0.82 441
rfc_cv.best_params_
{'max_depth': 10, 'max_features': 8}
rfc_cv.best_score_
0.8581096516276412